#' Clear environment.
## ----------------------------------------------------------------------------------------------------------------------------
remove(list = ls())

#' 
#' Load tidyverse and readxl
## ----------------------------------------------------------------------------------------------------------------------------
library(tidyverse)
library(readxl)
library(rgdal)

# set directories
source("./Code/set_directories.R")

#' 
#' ## ###################################### ##
#' ## CREATING A TRACT DATA FRAME ##
#' ## ###################################### ##
#' 
#' Load 2010 ZCTA to Census Tract Relationship File.
#' Website: http://www2.census.gov/geo/docs/maps-data/data/rel/zcta_tract_rel_10.txt
## ----------------------------------------------------------------------------------------------------------------------------
tracts <- read.csv(file.path(DATA.PATH,"zcta_tract_rel_10.txt"), sep=",", header = TRUE, colClasses=c("ZCTA5"="character","STATE"="character","COUNTY"="character","TRACT"="character","GEOID"="character"))

#' 
#' Load State FIPS Codes and then join to tracts dataframe.
#' Website: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/?cid=nrcs143_013696
## ----------------------------------------------------------------------------------------------------------------------------
stateFIPS <- read.csv(file.path(DATA.PATH,"stateFIPS.txt"),sep=",", header = TRUE, colClasses=c("FIPS"="character"))
names(stateFIPS)[3] <- "STATE"

tracts <- left_join(x=tracts,y=stateFIPS,by="STATE")

#' 
#' Load County FIPS Codes and then join to tracts dataframe.
#' Website: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697
## ----------------------------------------------------------------------------------------------------------------------------
countyFIPS <- read.csv(file.path(DATA.PATH,"countyFIPS.txt"))
countyFIPS <- array(unlist(countyFIPS),dim=c(3,3233))
countyFIPS <- as.data.frame(t(as.matrix(countyFIPS)))
countyFIPS <- countyFIPS[-1,]
names(countyFIPS)[1] <- "CountyFIPS"
names(countyFIPS)[2] <- "CountyName"
names(countyFIPS)[3] <- "PostalCode"

countyFIPS <- countyFIPS[,-3]
countyFIPS$CountyFIPS <- as.character(countyFIPS$CountyFIPS)

tracts$CountyFIPS <- paste(tracts$STATE,tracts$COUNTY,sep="")

tracts <- left_join(x=tracts,y=countyFIPS,by="CountyFIPS")

#' 
#' Add Regions and Divisions to tracts data frame.
#' Website: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf
## ----------------------------------------------------------------------------------------------------------------------------
regiondivision <- read.csv(file.path(DATA.PATH,"regiondivision.csv"),colClasses="character") %>% 
        rename(STATE = `ï..STATE`)

tracts <- left_join(x=tracts,y=regiondivision,by="STATE")

#' 
#' Add Urban Area to tracts data frame.
#' Website: http://www2.census.gov/geo/docs/maps-data/data/rel/ua_zcta_rel_10.txt
## ----------------------------------------------------------------------------------------------------------------------------
urbanareas <- read.csv(file.path(DATA.PATH,"ua_zcta_rel_10.txt"), sep=",", header = TRUE, colClasses=c("UA"="character","ZCTA5"="character"))

## Assign to Urban Cluster, Urbanized Area, or Rural based on characteristics of the string in UANAME column
urbanareas$UrbanType <- ifelse(str_sub(urbanareas$UANAME,-1,-1) == "r", "Urban Cluster",ifelse(str_sub(urbanareas$UANAME,-6,-1) == "n area","Rural","Urbanized Area"))

## Break into separate groupings by urbanization
urbanareas.UA.only <- urbanareas[which(urbanareas$UrbanType == "Urbanized Area"),]
urbanareas.UC.only <- urbanareas[which(urbanareas$UrbanType == "Urban Cluster"),]
urbanareas.RUR.only <- urbanareas[which(urbanareas$UrbanType == "Rural"),]

## Sum rows that have duplicate ZCTA5 for UA, UC, and RUR data frames
urbanareas.UA.only <- urbanareas.UA.only[,c(3,20,23)]
names(urbanareas.UA.only)[2] <- "Percent.UA.Pop"
names(urbanareas.UA.only)[3] <- "Percent.UA.Land"
urbanareas.UA.only <- urbanareas.UA.only %>% group_by(ZCTA5) %>% summarise_all(funs(sum))

urbanareas.UC.only <- urbanareas.UC.only[,c(3,20,23)]
names(urbanareas.UC.only)[2] <- "Percent.UC.Pop"
names(urbanareas.UC.only)[3] <- "Percent.UC.Land"
urbanareas.UC.only <- urbanareas.UC.only %>% group_by(ZCTA5) %>% summarise_all(funs(sum))

urbanareas.RUR.only <- urbanareas.RUR.only[,c(3,20,23)]
names(urbanareas.RUR.only)[2] <- "Percent.RUR.Pop"
names(urbanareas.RUR.only)[3] <- "Percent.RUR.Land"
urbanareas.RUR.only <- urbanareas.RUR.only %>% group_by(ZCTA5) %>% summarise_all(funs(sum))

## Used to generate the unique list of ZCTA5
unique_zcta <- urbanareas %>% distinct(ZCTA5, .keep_all = TRUE)

## Merge the unique list of ZCTA5 with each of the types of urban areas
unique_zcta <- left_join(x=unique_zcta,y=urbanareas.UA.only,by="ZCTA5")
unique_zcta <- left_join(x=unique_zcta,y=urbanareas.UC.only,by="ZCTA5")
unique_zcta <- left_join(x=unique_zcta,y=urbanareas.RUR.only,by="ZCTA5")

## Remove unnecessary columns and reorder columns for readability
unique_zcta <- unique_zcta[c(3,25,27,29,26,28,30)]

## Combine with the tracts dataframe
tracts <- left_join(x=tracts,y=unique_zcta,by="ZCTA5")

#' 
#' Create a data frame that determines the % of the population of a tract that is in an Urbanized Area (UA), an
#' Urban Cluster (UC), and Rural (RUR).
## ----------------------------------------------------------------------------------------------------------------------------
share.holder <- tracts[c(5,22,33,34,35)]

share.holder$Percent.Tract.UA.Pop <- share.holder$TRPOPPCT * share.holder$Percent.UA.Pop / 100
share.holder$Percent.Tract.UC.Pop <- share.holder$TRPOPPCT * share.holder$Percent.UC.Pop / 100
share.holder$Percent.Tract.RUR.Pop <- share.holder$TRPOPPCT * share.holder$Percent.RUR.Pop / 100

share.holder[is.na(share.holder)] <- 0

Total.Percent.UA.Pop <- aggregate(share.holder$Percent.Tract.UA.Pop, by = list(share.holder$GEOID), sum)
names(Total.Percent.UA.Pop)[1] <- "GEOID"
names(Total.Percent.UA.Pop)[2] <- "Total.Percent.UA.Pop"

Total.Percent.UC.Pop <- aggregate(share.holder$Percent.Tract.UC.Pop, by = list(share.holder$GEOID), sum)
names(Total.Percent.UC.Pop)[1] <- "GEOID"
names(Total.Percent.UC.Pop)[2] <- "Total.Percent.UC.Pop"

Total.Percent.RUR.Pop <- aggregate(share.holder$Percent.Tract.RUR.Pop, by = list(share.holder$GEOID), sum)
names(Total.Percent.RUR.Pop)[1] <- "GEOID"
names(Total.Percent.RUR.Pop)[2] <- "Total.Percent.RUR.Pop"

urban.share <- left_join(x=Total.Percent.UA.Pop,y=Total.Percent.UC.Pop,by="GEOID")
urban.share <- left_join(x=urban.share,y=Total.Percent.RUR.Pop,by="GEOID")


#' 
#' 
#' ## ######################## ##
#' ## ACS DATA CODE ##
#' ## ######################## ##
#' 
#' Load Alabama to Missouri ACS 2017 5-Year Data for Mean and Median Income by Census Tract
#' Website: https://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t
## ----------------------------------------------------------------------------------------------------------------------------
ALtoMO_S1902 <- read.csv(file.path(DATA.PATH,"ACS_17_5YR_S1902_with_ann_ALtoMO.csv"))
ALtoMO_S1903 <- read.csv(file.path(DATA.PATH,"ACS_17_5YR_S1903_with_ann_ALtoMO.csv"))

ALtoMO_S1902_small <- ALtoMO_S1902[,c(2,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128)]
ALtoMO_S1903_small <- ALtoMO_S1903[,c(2,3,4,8,10,12,14,16,18,20,22,24,26,28,30,34,32,36,38,40,42,44,46,48,50,52,54,56,58,60,62)]

names_S1902_small <- c("GEOID","Mean.HH","Number.HH.withearnings","Percent.HH.withearnings","Mean.HH.withearnings","Number.HH.withwages","Percent.HH.withwages","Mean.HH.withwages","Number.HH.withselfemployed","Percent.HH.withselfemployed","Mean.HH.withselfemployed","Number.HH.withinterest","Percent.HH.withinterest","Mean.HH.withinterest","Number.HH.withsocsec","Percent.HH.withsocsec","Mean.HH.withsocsec","Number.HH.withSSI","Percent.HH.withSSI","Mean.HH.withSSI","Number.HH.withSNAP","Percent.HH.withSNAP","Mean.HH.withSNAP","Number.HH.withpublicassist","Percent.HH.withpublicassist","Mean.HH.withpublicassist","Number.HH.withretirement","Percent.HH.withretirement","Mean.HH.withretirement","Number.HH.withother","Percent.HH.withother","Mean.HH.withother","Number.POP","Mean.PC","Number.POP.white","Percent.POP.white","Mean.PC.white","Number.POP.black","Percent.POP.black","Mean.PC.black","Number.POP.ai","Percent.POP.ai","Mean.PC.ai","Number.POP.asian","Percent.POP.asian","Mean.PC.asian","Number.POP.pi","Percent.POP.pi","Mean.PC.pi","Number.POP.other","Percent.POP.other","Mean.PC.other","Number.POP.multi","Percent.POP.multi","Mean.PC.multi","Number.POP.latino","Percent.POP.latino","Mean.PC.latino","Number.POP.whitealone","Percent.POP.whitealone","Mean.PC.whitealone")
names_S1903_small <- c("GEOID","GeoLabel","Number.HH","Median.HH","Number.HH.white","Percent.HH.white","Median.HH.white","Number.HH.black","Percent.HH.black","Median.HH.black","Number.HH.ai","Percent.HH.ai","Median.HH.ai","Number.HH.asian","Percent.HH.asian","Median.HH.asian","Number.HH.pi","Percent.HH.pi","Median.HH.pi","Number.HH.other","Percent.HH.other","Median.HH.other","Number.HH.multi","Percent.HH.multi","Median.HH.multi","Number.HH.latino","Percent.HH.latino","Median.HH.latino","Number.HH.whitealone","Percent.HH.whitealone","Median.HH.whitealone")

colnames(ALtoMO_S1902_small)[1:ncol(ALtoMO_S1902_small)] <- names_S1902_small
colnames(ALtoMO_S1903_small)[1:ncol(ALtoMO_S1903_small)] <- names_S1903_small

ALtoMO_S1902_small <- ALtoMO_S1902_small[-c(1),]
ALtoMO_S1903_small <- ALtoMO_S1903_small[-c(1),]

ALtoMO_small <- left_join(x=ALtoMO_S1903_small,y=ALtoMO_S1902_small,by="GEOID")

ALtoMO <- left_join(x=ALtoMO_S1903_small,y=ALtoMO_S1902_small,by="GEOID")

#' 
#' Load Montana to Wyoming ACS 2017 5-Year Data for Mean and Median Income by Census Tract
#' Website: https://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t
## ----------------------------------------------------------------------------------------------------------------------------
MTtoWY_S1902 <- read.csv(file.path(DATA.PATH,"ACS_17_5YR_S1902_with_ann_MTtoWY.csv"))
MTtoWY_S1903 <- read.csv(file.path(DATA.PATH,"ACS_17_5YR_S1903_with_ann_MTtoWY.csv"))

MTtoWY_S1902_small <- MTtoWY_S1902[,c(2,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128)]
MTtoWY_S1903_small <- MTtoWY_S1903[,c(2,3,4,8,10,12,14,16,18,20,22,24,26,28,30,34,32,36,38,40,42,44,46,48,50,52,54,56,58,60,62)]

names_S1902_small <- c("GEOID","Mean.HH","Number.HH.withearnings","Percent.HH.withearnings","Mean.HH.withearnings","Number.HH.withwages","Percent.HH.withwages","Mean.HH.withwages","Number.HH.withselfemployed","Percent.HH.withselfemployed","Mean.HH.withselfemployed","Number.HH.withinterest","Percent.HH.withinterest","Mean.HH.withinterest","Number.HH.withsocsec","Percent.HH.withsocsec","Mean.HH.withsocsec","Number.HH.withSSI","Percent.HH.withSSI","Mean.HH.withSSI","Number.HH.withSNAP","Percent.HH.withSNAP","Mean.HH.withSNAP","Number.HH.withpublicassist","Percent.HH.withpublicassist","Mean.HH.withpublicassist","Number.HH.withretirement","Percent.HH.withretirement","Mean.HH.withretirement","Number.HH.withother","Percent.HH.withother","Mean.HH.withother","Number.POP","Mean.PC","Number.POP.white","Percent.POP.white","Mean.PC.white","Number.POP.black","Percent.POP.black","Mean.PC.black","Number.POP.ai","Percent.POP.ai","Mean.PC.ai","Number.POP.asian","Percent.POP.asian","Mean.PC.asian","Number.POP.pi","Percent.POP.pi","Mean.PC.pi","Number.POP.other","Percent.POP.other","Mean.PC.other","Number.POP.multi","Percent.POP.multi","Mean.PC.multi","Number.POP.latino","Percent.POP.latino","Mean.PC.latino","Number.POP.whitealone","Percent.POP.whitealone","Mean.PC.whitealone")
names_S1903_small <- c("GEOID","GeoLabel","Number.HH","Median.HH","Number.HH.white","Percent.HH.white","Median.HH.white","Number.HH.black","Percent.HH.black","Median.HH.black","Number.HH.ai","Percent.HH.ai","Median.HH.ai","Number.HH.asian","Percent.HH.asian","Median.HH.asian","Number.HH.pi","Percent.HH.pi","Median.HH.pi","Number.HH.other","Percent.HH.other","Median.HH.other","Number.HH.multi","Percent.HH.multi","Median.HH.multi","Number.HH.latino","Percent.HH.latino","Median.HH.latino","Number.HH.whitealone","Percent.HH.whitealone","Median.HH.whitealone")

colnames(MTtoWY_S1902_small)[1:ncol(MTtoWY_S1902_small)] <- names_S1902_small
colnames(MTtoWY_S1903_small)[1:ncol(MTtoWY_S1903_small)] <- names_S1903_small

MTtoWY_S1902_small <- MTtoWY_S1902_small[-c(1),]
MTtoWY_S1903_small <- MTtoWY_S1903_small[-c(1),]

MTtoWY_small <- left_join(x=MTtoWY_S1903_small,y=MTtoWY_S1902_small,by="GEOID")

MTtoWY <- left_join(x=MTtoWY_S1903_small,y=MTtoWY_S1902_small,by="GEOID")

#' 
#' Bind the ALtoMO and MTtoWY dataframes. Create HH Median Income Decile dummies. Left join with urban.share dataframe.
## ----------------------------------------------------------------------------------------------------------------------------
ACSdata <- rbind(ALtoMO,MTtoWY)
ACSdata <- ACSdata %>% mutate(Median.decile = ntile(Median.HH,10))

library(fastDummies)
ACSdata <- dummy_cols(ACSdata,select_columns = 'Median.decile')

ACSdata$GEOID <- as.character(ACSdata$GEOID)
ACSdata <- left_join(x=ACSdata,y=urban.share,by="GEOID")


#' 
#' 
#' ## ################################################### ##
#' ## Demographic analysis of who lives near power plants ##
#' ## ################################################### ##
#' 
#' Load Census Gazetteer File for tracts.
#' Website: https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2017_Gazetteer/2017_Gaz_tracts_national.zip
## ----------------------------------------------------------------------------------------------------------------------------
gazetteer <- read.csv(file.path(DATA.PATH,"2017_Gaz_tracts_national.txt"), sep="", colClasses = c(GEOID = "character"),header = TRUE)
gazetteer_drop <- gazetteer[!(gazetteer$USPS=="AK" | gazetteer$USPS=="HI" | gazetteer$USPS=="PR"),] #drop Alaska, Hawaii and Puerto Rico

#' 
#' Load EIA Form 860, Schedule 2 and Schedule 3.1.
#' Website: https://www.eia.gov/electricity/data/eia860/
## ----------------------------------------------------------------------------------------------------------------------------
coltype.2013 <- rep("text", 71)
coltype.2013 <- replace(coltype.2013,c(1, 3, 15, 16, 17, 18, 19, 25, 26, 31, 54, 55, 56, 57, 58, 59, 60, 61, 64, 65, 66, 69),"guess")
operable_generators <- read_excel(file.path(DATA.PATH,"3_1_Generator_Y2013.xlsx"), col_types = coltype.2013, sheet = "Operable", skip = 1)
plants <- read_excel(file.path(DATA.PATH,"2___Plant_Y2013.xlsx"), skip = 1)

#' 
#' Combine operable_generators and plants dataframes.
## ----------------------------------------------------------------------------------------------------------------------------
op_gens <- operable_generators[,c(3,8,15,33)]
op_gens2 <- aggregate(op_gens$`Nameplate Capacity (MW)`, by = list(op_gens$`Plant Code`),sum)
colnames(op_gens2)[1] <- 'Plant Code'
colnames(op_gens2)[2] <- 'Total Capacity (MW)'
op_gens <- left_join(x = op_gens, y=op_gens2, by = 'Plant Code')

op_gens3 <- op_gens[!duplicated(op_gens$`Plant Code`),]

plants <- inner_join(x=plants,y=op_gens3,by='Plant Code')

#' 
#' Reduce data frames to that which is needed for distance calculations.
## ----------------------------------------------------------------------------------------------------------------------------
plants_geo <- plants[,c(3,10,11)]
gazette_geo <- gazetteer_drop[,c(2,7,8)]

#' 
#' Calculate nearest plant to each tract. (~9 minutes)
## ----------------------------------------------------------------------------------------------------------------------------
library(geosphere)
gazette_geo$'Nearest Plant' <- NA
gazette_geo$'Nearest Plant Distance (km)' <- NA
for (i in 1:nrow(gazette_geo)){
        latitude <- gazette_geo$INTPTLAT[i]
        longitude <- gazette_geo$INTPTLONG[i]
        number_plants <- 0
        culling <- 0.1
        while (number_plants == 0) {
          plants_for_distance <- plants_geo[which(plants_geo$Latitude<latitude+culling & plants_geo$Latitude>latitude-culling & plants_geo$Longitude<longitude+culling & plants_geo$Longitude>longitude-culling),]
          number_plants <- nrow(plants_for_distance)
          culling <- 1.5*culling
        }
        culling <- 1.5*culling
        plants_for_distance <- plants_geo[which(plants_geo$Latitude<latitude+culling & plants_geo$Latitude>latitude-culling & plants_geo$Longitude<longitude+culling & plants_geo$Longitude>longitude-culling),]
        distance_matrix <- distm(gazette_geo[i,c('INTPTLONG','INTPTLAT')], plants_for_distance[,c('Longitude','Latitude')], fun=distVincentyEllipsoid)
        gazette_geo$`Nearest Plant`[i] <- plants_for_distance$`Plant Code`[max.col(-distance_matrix)]
        gazette_geo$'Nearest Plant Distance (km)'[i] <- distance_matrix[max.col(-distance_matrix)]/1000
}


#' 
#' Combine "nearest plant" information with the ACSdata dataframe. Add plant information based on "nearest plant" and characteristics of these nearest plants.
## ----------------------------------------------------------------------------------------------------------------------------
ACSdata <- left_join(x=ACSdata,y=gazette_geo,by='GEOID')
plant_small <- plants[,c(3,10,11,19,37,39,40)]
colnames(plant_small)[1] <- 'Nearest Plant'

# Import EIA data layout "Reference Table 28" (fuel categories) and clean.
fuel.df <- read_excel("LayoutY2013.xlsx", sheet = "Reference Table 28", skip = 5)
drop.fuel <- c("...3", "...4", "...5")
fuel.df <- fuel.df[,!(names(fuel.df) %in% drop.fuel)]
colnames(fuel.df)[1] <- "Energy Source Category"
colnames(fuel.df)[2] <- "Energy Source 1"
colnames(fuel.df)[3] <- "Energy Source Description"
fuel.df <- fuel.df[-c(19,34,35,36),]
fuel.df$`Energy Source Category`[c(19,20,23,27)] <- "Renewable Fuels"
fuel.df$`Energy Source Category`[29] <- "Solar"
fuel.df$`Energy Source Category`[30] <- "Wind"
fuel.df$`Energy Source Category`[31] <- "Geothermal"
fuel.df$`Energy Source Category`[32] <- "Hydropower"
fuel.df$`Energy Source Category`[33] <- "Nuclear"
fuel.df$`Energy Source Category`[34] <- "All other energy sources"
fuel.df <- fuel.df %>% fill("Energy Source Category")
fuel.df$`Energy Source Description`[32] <- "Conventional hydroelectric power, wave power or pumping for storage"

plant_small <- left_join(x=plant_small,y=fuel.df,by='Energy Source 1')

ACSdata <- left_join(x=ACSdata,y=plant_small,by='Nearest Plant')


tracts_small <- tracts[,c(28,29,26)]
tracts_small <- tracts_small[!duplicated(tracts_small),]

ACSdata2 <- inner_join(x=ACSdata,y=tracts_small,by='CountyFIPS')

colnames(negative.env.ben.rec)[1] <- 'CountyFIPS'


#' 
#' Look at some descriptive statistics regarding demographics vs power plant location/power plant type/power plant size.
## ----------------------------------------------------------------------------------------------------------------------------
# Convert relevant columns to numeric in the ACSdata2 dataframe
ACSdata2 <- transform(ACSdata2, Number.POP = as.numeric(Number.POP), Number.POP.black = as.numeric(Number.POP.black), Number.POP.white = as.numeric(Number.POP.white), Number.POP.asian = as.numeric(Number.POP.asian), Number.POP.whitealone = as.numeric(Number.POP.whitealone), Number.POP.latino = as.numeric(Number.POP.latino))

ACSdata3 <- ACSdata2[!(ACSdata2$StateName=="Alaska" | ACSdata2$StateName=="Hawaii" | ACSdata2$StateName=="Puerto Rico"),] #drop Alaska, Hawaii and Puerto Rico

# Calculate the average nearest distance to plant
Avg_Dist_black <- sum(ACSdata3$Number.POP.black * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP.black)
Avg_Dist_whitealone <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP.whitealone)
Avg_Dist_asian <- sum(ACSdata3$Number.POP.asian * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP.asian)
Avg_Dist_latino <- sum(ACSdata3$Number.POP.latino * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP.latino)
Avg_Dist_All <- sum(ACSdata3$Number.POP * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP)

# Create dataframe to house average nearest distance to plant for black, white, asian and "All"
demographics_near_plants <- data.frame(Race = as.character(c("Black","White","Asian","Latino","All")), Avg.Distance = as.numeric(c(Avg_Dist_black,Avg_Dist_whitealone,Avg_Dist_asian,Avg_Dist_latino,Avg_Dist_All)))

# Create dummy columns for energy categories
library(fastDummies)
ACSdata3 <- dummy_cols(ACSdata3,select_columns = 'Energy.Source.Category')

# Create columns for finding generation mix percentages
demographics_near_plants$Percent.Coal <- NA
demographics_near_plants$Percent.PetroProd <- NA
demographics_near_plants$Percent.NG <- NA
demographics_near_plants$Percent.RenFuel <- NA
demographics_near_plants$Percent.Solar <- NA
demographics_near_plants$Percent.Wind <- NA
demographics_near_plants$Percent.Geo <- NA
demographics_near_plants$Percent.Hydro <- NA
demographics_near_plants$Percent.Nuclear <- NA
demographics_near_plants$Percent.AllOther <- NA

# Percentage of black population near different categories of plants
demographics_near_plants$Percent.Coal[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$Energy.Source.Category_Coal)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.PetroProd[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$`Energy.Source.Category_Petroleum Products`)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.NG[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$`Energy.Source.Category_Natural Gas and Other Gases`)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.RenFuel[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$`Energy.Source.Category_Renewable Fuels`)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.Solar[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$Energy.Source.Category_Solar)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.Wind[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$Energy.Source.Category_Wind)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.Geo[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$Energy.Source.Category_Geothermal)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.Hydro[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$Energy.Source.Category_Hydropower)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.Nuclear[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$Energy.Source.Category_Nuclear)/sum(ACSdata3$Number.POP.black)
demographics_near_plants$Percent.AllOther[1] <- sum(ACSdata3$Number.POP.black * ACSdata3$`Energy.Source.Category_All other energy sources`)/sum(ACSdata3$Number.POP.black)

# Percentage of white (alone) population near different categories of plants
demographics_near_plants$Percent.Coal[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Energy.Source.Category_Coal)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.PetroProd[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$`Energy.Source.Category_Petroleum Products`)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.NG[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$`Energy.Source.Category_Natural Gas and Other Gases`)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.RenFuel[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$`Energy.Source.Category_Renewable Fuels`)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.Solar[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Energy.Source.Category_Solar)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.Wind[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Energy.Source.Category_Wind)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.Geo[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Energy.Source.Category_Geothermal)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.Hydro[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Energy.Source.Category_Hydropower)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.Nuclear[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$Energy.Source.Category_Nuclear)/sum(ACSdata3$Number.POP.whitealone)
demographics_near_plants$Percent.AllOther[2] <- sum(ACSdata3$Number.POP.whitealone * ACSdata3$`Energy.Source.Category_All other energy sources`)/sum(ACSdata3$Number.POP.whitealone)

# Percentage of asian population near different categories of plants
demographics_near_plants$Percent.Coal[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$Energy.Source.Category_Coal)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.PetroProd[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$`Energy.Source.Category_Petroleum Products`)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.NG[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$`Energy.Source.Category_Natural Gas and Other Gases`)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.RenFuel[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$`Energy.Source.Category_Renewable Fuels`)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.Solar[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$Energy.Source.Category_Solar)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.Wind[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$Energy.Source.Category_Wind)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.Geo[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$Energy.Source.Category_Geothermal)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.Hydro[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$Energy.Source.Category_Hydropower)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.Nuclear[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$Energy.Source.Category_Nuclear)/sum(ACSdata3$Number.POP.asian)
demographics_near_plants$Percent.AllOther[3] <- sum(ACSdata3$Number.POP.asian * ACSdata3$`Energy.Source.Category_All other energy sources`)/sum(ACSdata3$Number.POP.asian)

# Percentage of latino population near different categories of plants
demographics_near_plants$Percent.Coal[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$Energy.Source.Category_Coal)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.PetroProd[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$`Energy.Source.Category_Petroleum Products`)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.NG[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$`Energy.Source.Category_Natural Gas and Other Gases`)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.RenFuel[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$`Energy.Source.Category_Renewable Fuels`)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.Solar[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$Energy.Source.Category_Solar)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.Wind[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$Energy.Source.Category_Wind)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.Geo[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$Energy.Source.Category_Geothermal)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.Hydro[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$Energy.Source.Category_Hydropower)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.Nuclear[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$Energy.Source.Category_Nuclear)/sum(ACSdata3$Number.POP.latino)
demographics_near_plants$Percent.AllOther[4] <- sum(ACSdata3$Number.POP.latino * ACSdata3$`Energy.Source.Category_All other energy sources`)/sum(ACSdata3$Number.POP.latino)

# Percentage of ALL population near different categories of plants
demographics_near_plants$Percent.Coal[5] <- sum(ACSdata3$Number.POP * ACSdata3$Energy.Source.Category_Coal)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.PetroProd[5] <- sum(ACSdata3$Number.POP * ACSdata3$`Energy.Source.Category_Petroleum Products`)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.NG[5] <- sum(ACSdata3$Number.POP * ACSdata3$`Energy.Source.Category_Natural Gas and Other Gases`)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.RenFuel[5] <- sum(ACSdata3$Number.POP * ACSdata3$`Energy.Source.Category_Renewable Fuels`)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.Solar[5] <- sum(ACSdata3$Number.POP * ACSdata3$Energy.Source.Category_Solar)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.Wind[5] <- sum(ACSdata3$Number.POP * ACSdata3$Energy.Source.Category_Wind)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.Geo[5] <- sum(ACSdata3$Number.POP * ACSdata3$Energy.Source.Category_Geothermal)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.Hydro[5] <- sum(ACSdata3$Number.POP * ACSdata3$Energy.Source.Category_Hydropower)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.Nuclear[5] <- sum(ACSdata3$Number.POP * ACSdata3$Energy.Source.Category_Nuclear)/sum(ACSdata3$Number.POP)
demographics_near_plants$Percent.AllOther[5] <- sum(ACSdata3$Number.POP * ACSdata3$`Energy.Source.Category_All other energy sources`)/sum(ACSdata3$Number.POP)

write.csv(demographics_near_plants,file.path(DATA.PROCESSED,'demographics_related_to_plants.csv'))

#' 
#' Decile Analysis
## ----------------------------------------------------------------------------------------------------------------------------
# Calculate the average nearest distance to plant for each decile
Avg_Dist_Decile_1 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_1 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_1)
Avg_Dist_Decile_2 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_2 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_2)
Avg_Dist_Decile_3 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_3 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_3)
Avg_Dist_Decile_4 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_4 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_4)
Avg_Dist_Decile_5 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_5 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_5)
Avg_Dist_Decile_6 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_6 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_6)
Avg_Dist_Decile_7 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_7 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_7)
Avg_Dist_Decile_8 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_8 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_8)
Avg_Dist_Decile_9 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_9 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_9)
Avg_Dist_Decile_10 <- sum(ACSdata3$Number.POP * ACSdata3$Median.decile_10 * ACSdata3$Nearest.Plant.Distance..km.)/sum(ACSdata3$Number.POP * ACSdata3$Median.decile_10)

# Create dataframe to house average nearest distance to plant for each decile
deciles_near_plants <- data.frame(Decile = as.character(c(1:10)), Avg.Distance = as.numeric(c(Avg_Dist_Decile_1,Avg_Dist_Decile_2,Avg_Dist_Decile_3,Avg_Dist_Decile_4,Avg_Dist_Decile_5,Avg_Dist_Decile_6,Avg_Dist_Decile_7,Avg_Dist_Decile_8,Avg_Dist_Decile_9,Avg_Dist_Decile_10)))

# Create columns for finding generation mix percentages
deciles_near_plants$Percent.Coal <- NA
deciles_near_plants$Percent.PetroProd <- NA
deciles_near_plants$Percent.NG <- NA
deciles_near_plants$Percent.RenFuel <- NA
deciles_near_plants$Percent.Solar <- NA
deciles_near_plants$Percent.Wind <- NA
deciles_near_plants$Percent.Geo <- NA
deciles_near_plants$Percent.Hydro <- NA
deciles_near_plants$Percent.Nuclear <- NA
deciles_near_plants$Percent.AllOther <- NA

# Re-order columns for loop
ACSdata3 <- ACSdata3[,c(1:93,95:102,94,103:130)]

# Run loop to populate the deciles_near_plants generation mix columns
for (i in 1:10){
        deciles_near_plants$Percent.Coal[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$Energy.Source.Category_Coal)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.PetroProd[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$`Energy.Source.Category_Petroleum Products`)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.NG[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$`Energy.Source.Category_Natural Gas and Other Gases`)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.RenFuel[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$`Energy.Source.Category_Renewable Fuels`)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.Solar[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$Energy.Source.Category_Solar)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.Wind[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$Energy.Source.Category_Wind)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.Geo[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$Energy.Source.Category_Geothermal)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.Hydro[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$Energy.Source.Category_Hydropower)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.Nuclear[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$Energy.Source.Category_Nuclear)/sum(ACSdata3$Number.POP * ACSdata3[,92+i])
        deciles_near_plants$Percent.AllOther[i] <- sum(ACSdata3$Number.POP * ACSdata3[,92+i] * ACSdata3$`Energy.Source.Category_All other energy sources`)/sum(ACSdata3$Number.POP * ACSdata3[,92+i]) 
}

write.csv(deciles_near_plants,file.path(DATA.PROCESSED,'deciles_related_to_plants.csv'))

